Filter Perfomance and Stability

Measure performance between Standard Filter/ Square Root Filter, CPU/GPU, batched/not batched

KalmanFilterPerformance

 KalmanFilterPerformance (n_obs=100, n_dim_obs=4, n_dim_state=3,
                          n_dim_contr=3, bs=5, p_missing=0.3,
                          init_method='random', use_sr_filter=True,
                          device='cpu', use_conditional=True,
                          use_batch=True, **kwargs)

Initialize self. See help(type(self)) for accurate signature.

kf = KalmanFilterPerformance(p_missing=0)
kf.time_method('filter')
0.14900339799987705

source

product_dict

 product_dict (**kwargs)

perf_comb_params

 perf_comb_params (method, n_obs=100, n_dim_obs=4, n_dim_state=3,
                   n_dim_contr=3, bs=5, p_missing=0.3,
                   init_method='random', use_sr_filter=True, device='cpu',
                   use_conditional=True, use_batch=True)
perf_comb_params('filter')
shape: (1, 10)
bs device n_dim_contr n_dim_obs n_dim_state n_obs time use_batch use_conditional use_sr_filter
i64 str i64 i64 i64 i64 f64 bool bool bool
5 "cpu" 3 4 3 100 0.254086 true true true

SR vs Normal Filter

perf1 = perf_comb_params('filter', use_sr_filter=[True, False], rep=range(2))
perf1
shape: (200, 11)
bs device n_dim_contr n_dim_obs n_dim_state n_obs rep time use_batch use_conditional use_sr_filter
i64 str i64 i64 i64 i64 i64 f64 bool bool bool
5 "cpu" 3 4 3 100 0 0.274235 true true true
5 "cpu" 3 4 3 100 1 0.265272 true true true
5 "cpu" 3 4 3 100 2 0.261474 true true true
5 "cpu" 3 4 3 100 3 0.257838 true true true
5 "cpu" 3 4 3 100 4 0.262304 true true true
5 "cpu" 3 4 3 100 5 0.269207 true true true
5 "cpu" 3 4 3 100 6 0.252871 true true true
5 "cpu" 3 4 3 100 7 0.262902 true true true
5 "cpu" 3 4 3 100 8 0.320611 true true true
5 "cpu" 3 4 3 100 9 0.359268 true true true
5 "cpu" 3 4 3 100 10 0.351303 true true true
5 "cpu" 3 4 3 100 11 0.349618 true true true
... ... ... ... ... ... ... ... ... ... ...
5 "cpu" 3 4 3 100 88 0.251293 true true false
5 "cpu" 3 4 3 100 89 0.258562 true true false
5 "cpu" 3 4 3 100 90 0.249729 true true false
5 "cpu" 3 4 3 100 91 0.25231 true true false
5 "cpu" 3 4 3 100 92 0.254119 true true false
5 "cpu" 3 4 3 100 93 0.2571 true true false
5 "cpu" 3 4 3 100 94 0.252648 true true false
5 "cpu" 3 4 3 100 95 0.251427 true true false
5 "cpu" 3 4 3 100 96 0.249157 true true false
5 "cpu" 3 4 3 100 97 0.252483 true true false
5 "cpu" 3 4 3 100 98 0.250241 true true false
5 "cpu" 3 4 3 100 99 0.252376 true true false
perf1.groupby('use_sr_filter').agg(pl.col("time").mean()).with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type"))
shape: (2, 3)
use_sr_filter time Filter type
bool f64 str
true 0.268333 "Square Root Fi...
false 0.250166 "Standard Filte...
perf1 = perf1.with_column(pl.when(pl.col("use_sr_filter")).then(pl.lit("Square Root Filter")).otherwise(pl.lit("Standard Filter")).alias("Filter type"))
plot_perf_sr = alt.Chart(perf1.to_pandas()).mark_boxplot(size = 50).encode(
    x=alt.X('Filter type', axis=alt.Axis(labelAngle=0)),
    y=alt.Y('time', scale=alt.Scale(zero=False), title="time [s]"),
    color=alt.Color('Filter type',
                    scale = alt.Scale(scheme = 'accent'))
).properties(width=300)
plot_perf_sr

CPU vs GPU

gpu_best = perf_comb_params('filter', bs=1, n_obs=5, n_dim_contr=5, n_dim_obs=5, n_dim_state=5,
                            device=['cpu', 'cuda'], use_sr_filter=[True, False], p_missing=0, rep=2, use_batch=[True, False])
gpu_best
shape: (160, 12)
bs device n_dim_contr n_dim_obs n_dim_state n_obs p_missing rep time use_batch use_conditional use_sr_filter
i64 str i64 i64 i64 i64 i64 i64 f64 bool bool bool
100 "cpu" 5 5 5 50 0 0 0.134151 true true true
100 "cpu" 5 5 5 50 0 1 0.097733 true true true
100 "cpu" 5 5 5 50 0 2 0.074773 true true true
100 "cpu" 5 5 5 50 0 3 0.07477 true true true
100 "cpu" 5 5 5 50 0 4 0.092972 true true true
100 "cpu" 5 5 5 50 0 5 0.112495 true true true
100 "cpu" 5 5 5 50 0 6 0.106605 true true true
100 "cpu" 5 5 5 50 0 7 0.103665 true true true
100 "cpu" 5 5 5 50 0 8 0.094165 true true true
100 "cpu" 5 5 5 50 0 9 0.105327 true true true
100 "cpu" 5 5 5 50 0 10 0.096838 true true true
100 "cpu" 5 5 5 50 0 11 0.080813 true true true
... ... ... ... ... ... ... ... ... ... ... ...
100 "cuda" 5 5 5 50 0 8 8.77501 false true false
100 "cuda" 5 5 5 50 0 9 10.170727 false true false
100 "cuda" 5 5 5 50 0 10 11.289223 false true false
100 "cuda" 5 5 5 50 0 11 11.012791 false true false
100 "cuda" 5 5 5 50 0 12 9.845103 false true false
100 "cuda" 5 5 5 50 0 13 9.998321 false true false
100 "cuda" 5 5 5 50 0 14 10.513864 false true false
100 "cuda" 5 5 5 50 0 15 9.192036 false true false
100 "cuda" 5 5 5 50 0 16 10.006169 false true false
100 "cuda" 5 5 5 50 0 17 8.915112 false true false
100 "cuda" 5 5 5 50 0 18 8.832785 false true false
100 "cuda" 5 5 5 50 0 19 9.227368 false true false
gpu_best.groupby(['device', 'use_batch']).agg(pl.col("time").mean())
shape: (4, 3)
device use_batch time
str bool f64
"cuda" false 9.602944
"cpu" false 4.560856
"cuda" true 0.274758
"cpu" true 0.083738
kwargs = {'a': 1, 'b': (1,2)}
kwargs ={key:tuplify(arg) for key, arg in kwargs.items()}
list(product_dict(**kwargs))
[{'a': 1, 'b': 1}, {'a': 1, 'b': 2}]
method = kf.get_method('filter')
from timeit import timeit
timeit('method()', globals={'method': method}, number=10)
0.15532574900134932

Performance

def compare_performance(n_obs, n_dim_obs, n_dim_state, n_dim_contr, bs, dtype=torch.float64):
    kf_cuda = KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype).cuda()
    data_cuda, mask_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, device="cuda", dtype=dtype)
    
    print("GPU")


    kf_cuda = KalmanFilter.init_random(n_dim_obs,n_dim_state, dtype=dtype)
    data_cuda, mask_cuda = get_test_data(n_dim_obs,n_dim_state, bs=bs, dtype=dtype)
    print("CPU")

    print("No batches CPU")

    print("No batches GPU")
compare_performance(100, 2,2,100)
GPU
87.9 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.83 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
12.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
154 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
compare_performance(200, 10,10,200)
GPU
2.04 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
7.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.5 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.07 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

Float64

compare_performance(100, 2,2,100, dtype=torch.float64)
GPU
100 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.29 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.9 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
159 ms ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
compare_performance(200, 10,10,200, dtype=torch.float64)
GPU
2.22 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
CPU
8.35 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches CPU
13.7 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)
No batches GPU
2.01 s ± 0 ns per loop (mean ± std. dev. of 1 run, 1 loop each)

Stability

    kSR.Q_raw = torch.nn.Parameter(kSR.Q_raw + eye_like(kSR.Q_raw) * torch.sqrt(torch.tensor(1e-5)))
    kSR.R_raw = torch.nn.Parameter(kSR.R_raw + eye_like(kSR.R_raw) * torch.sqrt(torch.tensor(1e-5)))
    kSR.P0_raw = torch.nn.Parameter(kSR.P0_raw + eye_like(kSR.P0_raw) * torch.sqrt(torch.tensor(1e-5)))

fuzz_filter_SR

 fuzz_filter_SR (n_iter=10, n_obs=50)
err_raw = fuzz_filter_SR(10, 120)
err = err_raw.groupby('t').agg([
    pl.col('MAE').median().alias("median"),
    pl.col('MAE').quantile(.75).alias("Q3"),
    pl.col('MAE').quantile(.25).alias("Q1"),
    pl.col('MAE').max().alias("max")
])
median = alt.Chart(err.to_pandas()).mark_line(color="black"
           ).encode(
    x = alt.X('t', title="Number of Iterations"),
    y = alt.Y('median', axis=alt.Axis(format=".1e"), scale=alt.Scale(type="log"), title="log MAE"),
    # color=datum("median"),
    strokeDash = datum("median")
    #, scale=alt.Scale(range=['black']))
)

Q1 = alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q1', strokeDash=datum("quantile"))
Q3 = alt.Chart(err.to_pandas()).mark_line(color='dimgray', strokeDash=[4,6]).encode(x = 't', y = 'Q3', strokeDash=datum("quantile"))
max = alt.Chart(err.to_pandas()).mark_line(color='black', strokeDash=[2,2]).encode(x = 't', y = 'max', strokeDash=datum("max"))
p = (Q1 + Q3 + max + median).interactive().properties(title="Standard Filter vs Square Root Filter (Mean Absolute Error of state cavariances)")
p

plot_err_sr_filter

 plot_err_sr_filter (err_raw)